## data analysis '53 paper 
## controlling for distance to nearest county capital

rm(list = ls())

library(readxl)
library(multiwayvcov)
library(lmtest)
library(Hmisc)

setwd("~/Dropbox/Current projects/1953/Replication archive")


## choose signal strength measure to employ
signal <- "lrm_4"

## should municipalities be dropped/recoded to account for possible jamming?
## jamming = km10, km12, km15; adjust = "drop", "min"
jamming <- ""
adjust <- ""

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))


##  import data on nearest county capital
capital.dist <- read_excel("nearest_cap_061516.xlsx", 1)
dim(capital.dist)
colnames(capital.dist)

##  consistency checks
table(dat$id == capital.dist$id)
capital.dist$nearest.cap <- as.numeric(as.character(capital.dist$nearest.cap))

all(capital.dist$nearest.cap[capital.dist$county_cap == 1] == 0)
all(capital.dist$nearest.cap[capital.dist$county_cap == 0] > 0)

range(capital.dist$nearest.cap)


dat <- cbind(dat, capital.dist$nearest.cap)
colnames(dat)[88] <- "capital.dist"



## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100


table1 <- matrix(NA,46,4)
rownames(table1) <- c(	"intercept",
						"county capital",
						"KVP base",
						"$%$ agriculture and forestry",
						"$%$ mining",
						"$%$ metalworking industry",
						"$%$ chemical industry",
						"$%$ woods and plastics",
						"$%$ consumer goods",
						"$%$ construction",
						"$%$ transportation",
						"$%$ commerce",
						"$%$ services",
						"log(population size)",
						"$%$ population male",
						"population density",
						"$%$ population change 1939--50",
						"$%$ population in same {\\it Land}",
						"$%$ 8 years of education",
						"$%$ 10 years of education",
						"$%$ 12 years of education",
						"$%$ more than 12 years of education",
						"$%$ trade school degree",
						"$%$ university degree",
						"$%$ protestant",
						"$%$ catholic",
						"$%$ agnostic",
						"living space per capita (m$^2$)", 
						"$%$ turnout '32",
						"$%$ invalid votes '32",
						"$%$ NSDAP '32",
						"$%$ SPD '32",
						"$%$ KPD '32",
						"$%$ Zentrum '32",
						"$%$ DNVP '32",
						"$%$ DVP '32",
						"distance to nearest county capital (km)",
						"RIAS signal strength",
						"RIAS signal strength$^2$",
						"RIAS signal strength$^3$",
						"distance to East Berlin (100 km)",
						"distance to East Berlin$^2$ (100 km)",
						"distance to East Berlin$^3$ (100 km)",
						"Wald test RIAS $p$-value",
						"Wald test distance $p$-value",
						"district fixed effects")

table1[46,1] <- "No"
table1[46,3] <- "Yes"



############################################################
## model 1 --- RIAS and distance to Berlin; no fixed effects;
## distance to nearest county capital
############################################################
source("hl.R")

probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							capital.dist +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)


## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:43,1:2] <- hl(temp[1:43,1:2])


## Wald test RIAS
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,1] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[45,1] <- round(1 - pchisq(W, df = nrow(Q)),3)



############################################################
## model 2 --- RIAS and distance to Berlin; fixed effects;
## distance to nearest county capital
############################################################
probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							capital.dist +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)


## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:43,3:4] <- hl(temp[1:43,1:2])


## Wald test RIAS
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,3] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[45,3] <- round(1 - pchisq(W, df = nrow(Q)),3)


latex(table1, file = "")

.





